{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. Detect Nodules from Kaggle Dataset\n",
    "\n",
    "## Summary\n",
    "* load and process kaggle dataset\n",
    "* Generate prediction masks with trained unet\n",
    "* Reduce false positives with trained CNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:64: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:66: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:69: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:71: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:74: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:76: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:79: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:81: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:84: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:86: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:88: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\keras\\legacy\\layers.py:458: UserWarning: The `Merge` layer is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
      "  name=name)\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:91: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:93: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:96: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:98: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:100: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:101: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:103: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:105: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:106: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:107: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1, (1, 1), activation=\"sigmoid\")`\n",
      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:109: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=Tensor(\"co..., inputs=Tensor(\"in...)`\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "### EDIT HERE ###\n",
    "\n",
    "unetweightspath=\"modelweights/unet-weights-improvement.hdf5\"\n",
    "truenoduleweightspath=\"modelweights/truenodule-cnn-weights-improvement.hdf5\"\n",
    "INPUT_FOLDER = 'stage1/' #path to kaggle stage1 dataset\n",
    "datafolder=\"processeddata/\"\n",
    "\n",
    "####################\n",
    "import numpy as np # linear algebra\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import dicom\n",
    "import os\n",
    "import scipy.ndimage\n",
    "import time\n",
    "from keras.callbacks import ModelCheckpoint\n",
    "import h5py\n",
    "from sklearn.cluster import KMeans\n",
    "from skimage import measure, morphology\n",
    "import cell_magic_wand as cmw\n",
    "import numpy as np\n",
    "import csv\n",
    "import random\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.ensemble import RandomForestClassifier as RF\n",
    "from sklearn.metrics import confusion_matrix, classification_report\n",
    "from scipy.ndimage.measurements import center_of_mass, label\n",
    "from skimage.measure import regionprops\n",
    "\n",
    "import keras\n",
    "from keras.models import Sequential,load_model,Model\n",
    "from keras.layers import Dense, Dropout, Activation, Flatten\n",
    "from keras.layers import Conv2D, MaxPooling2D, SpatialDropout2D\n",
    "from keras.layers import Input, merge, UpSampling2D, BatchNormalization\n",
    "from keras.optimizers import Adam\n",
    "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
    "from keras import backend as K\n",
    "from keras.utils import to_categorical\n",
    "from keras.datasets import mnist\n",
    "from keras.models import Sequential\n",
    "from keras import backend as K\n",
    "from keras.optimizers import Adam\n",
    "# Some constants \n",
    "\n",
    "patients = os.listdir(INPUT_FOLDER)\n",
    "#patients=patients.sort()\n",
    "K.set_image_dim_ordering('th') \n",
    "\n",
    "#Code sourced from https://www.kaggle.com/c/data-science-bowl-2017#tutorial\n",
    "smooth = 1.0\n",
    "width = 32\n",
    "\n",
    "def dice_coef(y_true, y_pred):\n",
    "    y_true_f = K.flatten(y_true)\n",
    "    y_pred_f = K.flatten(y_pred)\n",
    "    intersection = K.sum(y_true_f * y_pred_f)\n",
    "    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)\n",
    "def dice_coef_loss(y_true, y_pred):\n",
    "    return -dice_coef(y_true, y_pred)\n",
    "\n",
    "def unet_model():\n",
    "    inputs = Input((1, 512, 512))\n",
    "    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(inputs)\n",
    "    conv1 = BatchNormalization(axis = 1)(conv1)\n",
    "    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv1)\n",
    "    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)\n",
    "\n",
    "    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(pool1)\n",
    "    conv2 = BatchNormalization(axis = 1)(conv2)\n",
    "    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv2)\n",
    "    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)\n",
    "\n",
    "    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(pool2)\n",
    "    conv3 = BatchNormalization(axis = 1)(conv3)\n",
    "    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv3)\n",
    "    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)\n",
    "\n",
    "    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(pool3)\n",
    "    conv4 = BatchNormalization(axis = 1)(conv4)\n",
    "    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv4)\n",
    "    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)\n",
    "\n",
    "    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(pool4)\n",
    "    conv5 = BatchNormalization(axis = 1)(conv5)\n",
    "    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(conv5)\n",
    "\n",
    "    up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1)\n",
    "    conv6 = SpatialDropout2D(0.35)(up6)\n",
    "    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)\n",
    "    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)\n",
    "\n",
    "    up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1)\n",
    "    conv7 = SpatialDropout2D(0.35)(up7)\n",
    "    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)\n",
    "    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)\n",
    "\n",
    "    up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1)\n",
    "    conv8 = SpatialDropout2D(0.35)(up8)\n",
    "    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)\n",
    "    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)\n",
    "\n",
    "    up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1)\n",
    "    conv9 = SpatialDropout2D(0.35)(up9)\n",
    "    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)\n",
    "    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)\n",
    "    conv10 = Conv2D(1, 1, 1, activation='sigmoid')(conv9)\n",
    "\n",
    "    model = Model(input=inputs, output=conv10)\n",
    "    model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])\n",
    "    return model\n",
    "\n",
    "unet_model=unet_model()\n",
    "unet_model.load_weights(unetweightspath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#classify as nodule or non-nodule\n",
    "input_shape=(1,512,512)\n",
    "num_classes=2\n",
    "model = Sequential()\n",
    "model.add(Conv2D(8, kernel_size=(3, 3),\n",
    "                 activation='relu',\n",
    "                 input_shape=input_shape))\n",
    "model.add(Conv2D(16, (3, 3), activation='relu'))\n",
    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "model.add(Dropout(0.25))\n",
    "model.add(Flatten())\n",
    "model.add(Dense(32, activation='relu'))\n",
    "model.add(Dropout(0.5))\n",
    "model.add(Dense(num_classes, activation='softmax'))\n",
    "\n",
    "model.compile(loss=keras.losses.binary_crossentropy,\n",
    "              optimizer=Adam(lr=1e-5),\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "\n",
    "model.load_weights(truenoduleweightspath)\n",
    "#os.environ[\"PATH\"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'\n",
    "#plot_model(model, to_file=\"CNNdiagram.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load the scans in given folder path\n",
    "def load_scan(path):\n",
    "    # code sourced from https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial\n",
    "    slices = [dicom.read_file(path + '/' + s, force=True) for s in os.listdir(path) if s.endswith('.dcm')]\n",
    "    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]), reverse=True)\n",
    "    try:\n",
    "        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])\n",
    "    except:\n",
    "        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)\n",
    "        \n",
    "    for s in slices:\n",
    "        s.SliceThickness = slice_thickness\n",
    "        \n",
    "    return slices\n",
    "\n",
    "def get_pixels_hu(slices):\n",
    "    #code sourced from https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial\n",
    "    image = np.stack([s.pixel_array for s in slices])\n",
    "    # Convert to int16 (from sometimes int16), \n",
    "    # should be possible as values should always be low enough (<32k)\n",
    "    image = image.astype(np.int16)\n",
    "\n",
    "    # Set outside-of-scan pixels to 0\n",
    "    # The intercept is usually -1024, so air is approximately 0\n",
    "    image[image == -2000] = 0\n",
    "    \n",
    "    # Convert to Hounsfield units (HU)\n",
    "    for slice_number in range(len(slices)):\n",
    "        \n",
    "        intercept = slices[slice_number].RescaleIntercept\n",
    "        slope = slices[slice_number].RescaleSlope\n",
    "        \n",
    "        if slope != 1:\n",
    "            image[slice_number] = slope * image[slice_number].astype(np.float64)\n",
    "            image[slice_number] = image[slice_number].astype(np.int16)\n",
    "            \n",
    "        image[slice_number] += np.int16(intercept)\n",
    "    \n",
    "    return np.array(image, dtype=np.int16)\n",
    "\n",
    "def processimage(img):\n",
    "    #function sourced from https://www.kaggle.com/c/data-science-bowl-2017#tutorial\n",
    "    #Standardize the pixel values\n",
    "    mean = np.mean(img)\n",
    "    std = np.std(img)\n",
    "    img = img-mean\n",
    "    img = img/std\n",
    "    #plt.hist(img.flatten(),bins=200)\n",
    "    #plt.show()\n",
    "    #print(thresh_img[366][280:450])\n",
    "    middle = img[100:400,100:400] \n",
    "    mean = np.mean(middle)  \n",
    "    max = np.max(img)\n",
    "    min = np.min(img)\n",
    "    #move the underflow bins\n",
    "    img[img==max]=mean\n",
    "    img[img==min]=mean\n",
    "    kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))\n",
    "    centers = sorted(kmeans.cluster_centers_.flatten())\n",
    "    threshold = np.mean(centers)\n",
    "    thresh_img = np.where(img<threshold,1.0,0.0)  # threshold the image\n",
    "    eroded = morphology.erosion(thresh_img,np.ones([4,4]))\n",
    "    dilation = morphology.dilation(eroded,np.ones([10,10]))\n",
    "    labels = measure.label(dilation)\n",
    "    label_vals = np.unique(labels)\n",
    "    #plt.imshow(labels)\n",
    "    #plt.show()\n",
    "    labels = measure.label(dilation)\n",
    "    label_vals = np.unique(labels)\n",
    "    regions = measure.regionprops(labels)\n",
    "    good_labels = []\n",
    "    for prop in regions:\n",
    "        B = prop.bbox\n",
    "        if B[2]-B[0]<475 and B[3]-B[1]<475 and B[0]>40 and B[2]<472:\n",
    "            good_labels.append(prop.label)\n",
    "    mask = np.ndarray([512,512],dtype=np.int8)\n",
    "    mask[:] = 0\n",
    "    #\n",
    "    #  The mask here is the mask for the lungs--not the nodes\n",
    "    #  After just the lungs are left, we do another large dilation\n",
    "    #  in order to fill in and out the lung mask \n",
    "    #\n",
    "    for N in good_labels:\n",
    "        mask = mask + np.where(labels==N,1,0)\n",
    "    mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation\n",
    "    return mask*img\n",
    "\n",
    "def processimagenomask(img):\n",
    "    #Standardize the pixel values\n",
    "    mean = np.mean(img)\n",
    "    std = np.std(img)\n",
    "    img = img-mean\n",
    "    img = img/std\n",
    "    #plt.hist(img.flatten(),bins=200)\n",
    "    #plt.show()\n",
    "    #print(thresh_img[366][280:450])\n",
    "    middle = img[100:400,100:400] \n",
    "    mean = np.mean(middle)  \n",
    "    max = np.max(img)\n",
    "    min = np.min(img)\n",
    "    #move the underflow bins\n",
    "    img[img==max]=mean\n",
    "    img[img==min]=mean\n",
    "    return img\n",
    "\n",
    "def processimagefromfile(ppix):\n",
    "    processpix=np.ndarray([ppix.shape[0],1,512,512])\n",
    "    for i in range(ppix.shape[0]):\n",
    "        processpix[i,0]=processimage(ppix[i])\n",
    "    return processpix\n",
    "\n",
    "#predict mask from images\n",
    "def predictmask(images):\n",
    "    num_test=images.shape[0]\n",
    "    imgs_mask_test = np.ndarray([num_test,1,512,512],dtype=np.float32)\n",
    "    for i in range(num_test):\n",
    "        imgs_mask_test[i] = unet_model.predict([images[i:i+1]], verbose=0)[0]\n",
    "    return imgs_mask_test\n",
    "\n",
    "#find number of slices where a nodule is detected\n",
    "def getnoduleindex(imgs_mask_test):\n",
    "    masksum=[np.sum(maskslice[0]) for maskslice in imgs_mask_test]\n",
    "    return [i for i in range(len(masksum)) if masksum[i]>5]\n",
    "\n",
    "def trueindicies(processed_pix, noduleindex):\n",
    "    noduleimgs=[processed_pix[ind] for ind in noduleindex]\n",
    "    noduleimgs=np.array(noduleimgs)\n",
    "    predictions=model.predict(noduleimgs)\n",
    "    predictions=predictions[:len(predictions),1]\n",
    "    predictions[predictions>0.5]=True\n",
    "    predictions[predictions<0.5]=False\n",
    "    trueindicies=[ind for i,ind in enumerate(noduleindex) if predictions[i]==1]\n",
    "    return trueindicies\n",
    "\n",
    "def thresholdnodules(noduleindices,mask):\n",
    "    nodulearea=[]\n",
    "    for ind in trueindicies:\n",
    "        nodulearea.append(np.sum(mask[ind]))\n",
    "    return nodulearea\n",
    "\n",
    "def largestnodulecoordinates(mask):\n",
    "    #mask=nodulemasks[indx,0][0]\n",
    "    mask[mask>0.5]=1\n",
    "    mask[mask<0.5]=0\n",
    "    labeled_array,nf=label(mask)\n",
    "    areasinslice=[]\n",
    "    if nf>1:\n",
    "        for n in range(nf):\n",
    "            lab=np.array(labeled_array)\n",
    "            lab[lab!=(n+1)]=0\n",
    "            lab[lab==(n+1)]=1\n",
    "            areasinslice.append(np.sum(lab))\n",
    "        nlargest=areasinslice.index(max(areasinslice))\n",
    "        labeled_array[labeled_array!=(nlargest+1)]=0\n",
    "        com=center_of_mass(labeled_array)\n",
    "    else:\n",
    "        com=center_of_mass(mask)\n",
    "    return [int(com[0]),int(com[1])]\n",
    "\n",
    "def largestnodulearea(mask,table,i):\n",
    "    #mask=nodulemasks[indx,0][0]\n",
    "    mask[mask>0.5]=1\n",
    "    mask[mask<0.5]=0\n",
    "    labeled_array,nf=label(mask)\n",
    "    areasinslice=[]\n",
    "    if nf>1:\n",
    "        for n in range(nf):\n",
    "            lab=np.array(labeled_array)\n",
    "            lab[lab!=(n+1)]=0\n",
    "            lab[lab==(n+1)]=1\n",
    "            areasinslice.append(np.sum(lab))\n",
    "        #nlargest=areasinslice.index(max(areasinslice))\n",
    "        #labeled_array[labeled_array!=(nlargest+1)]=0\n",
    "        return max(areasinslice)\n",
    "    else:\n",
    "        return table[\"Area\"][i]\n",
    "\n",
    "def crop_nodule(coord,image):\n",
    "    dim=32\n",
    "    return image[coord[0]-dim:coord[0]+dim,coord[1]-dim:coord[1]+dim]\n",
    "#output: 64x64 images of the nodules with malignancy labels from the patient\n",
    "\n",
    "def largestnoduleproperties(mask):\n",
    "    mask[mask>0.5]=1\n",
    "    mask[mask<0.5]=0\n",
    "    mask=mask.astype(np.int8)\n",
    "    labeled_array,nf=label(mask)\n",
    "    areasinslice=[]\n",
    "    if nf>1:\n",
    "        for n in range(nf):\n",
    "            lab=np.array(labeled_array)\n",
    "            lab[lab!=(n+1)]=0\n",
    "            lab[lab==(n+1)]=1\n",
    "            areasinslice.append(np.sum(lab))\n",
    "        nlargest=areasinslice.index(max(areasinslice))\n",
    "        labeled_array[labeled_array!=(nlargest+1)]=0\n",
    "        noduleprops=regionprops(labeled_array)\n",
    "    else:\n",
    "        noduleprops=regionprops(mask)\n",
    "    area=noduleprops[0].area\n",
    "    eccentricity=noduleprops[0].eccentricity\n",
    "    diam=noduleprops[0].equivalent_diameter\n",
    "    diammajor=noduleprops[0].major_axis_length\n",
    "    spiculation=noduleprops[0].solidity\n",
    "    return area, eccentricity, diam, diammajor, spiculation\n",
    "\n",
    "def generatefeaturetable(nodulemasks):\n",
    "    meannoduleHU=[]\n",
    "    nodulecount=[]\n",
    "    largestarealist=[]\n",
    "    eccentricitylist=[]\n",
    "    diamlist=[]\n",
    "    diammajorlist=[]\n",
    "    spiculationlist=[]\n",
    "\n",
    "    for i in range(nodulemasks.shape[0]):\n",
    "        mask=nodulemasks[i,0]\n",
    "        mask[mask>0.5]=1\n",
    "        mask[mask<0.5]=0\n",
    "        meannoduleHU.append(np.sum(noduleimages[i,0]*mask)/np.sum(mask))\n",
    "        labeled_array,features=label(mask)\n",
    "        nodulecount.append(features)\n",
    "        area, eccentricity, diam, diammajor, spiculation = largestnoduleproperties(nodulemasks[i,0])\n",
    "        largestarealist.append(area)\n",
    "        eccentricitylist.append(eccentricity)\n",
    "        diamlist.append(diam)\n",
    "        diammajorlist.append(diammajor)\n",
    "        spiculationlist.append(spiculation)\n",
    "    table=pd.DataFrame({\"Patient\":sample,\"NoduleIndex\":noduleindicies,\"Area\":area,\"MeanHU\":meannoduleHU, \"LargestNoduleArea\":largestarealist,\n",
    "                    \"Eccentricity\":eccentricitylist, \"Diameter\":diamlist, \"DiameterMajor\":diammajorlist, \"Spiculation\":spiculationlist})\n",
    "    return table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing patient# 1200 ETA: 13.88888888888889 hrs\n",
      "Processing patient# 1201 ETA: 12.623715912898382 hrs\n",
      "Processing patient# 1202 ETA: 10.018525106410186 hrs\n",
      "Processing patient# 1203 ETA: 18.164322644339666 hrs\n",
      "Processing patient# 1204 ETA: 15.95403834571441 hrs\n",
      "Processing patient# 1205 ETA: 14.166520277659098 hrs\n",
      "Processing patient# 1206 ETA: 13.91482162181978 hrs\n",
      "Processing patient# 1208 ETA: 13.044705638507056 hrs\n",
      "Processing patient# 1209 ETA: 11.268210861565155 hrs\n",
      "Processing patient# 1210 ETA: 11.153029166426924 hrs\n",
      "Processing patient# 1211 ETA: 10.874260760220615 hrs\n",
      "Processing patient# 1212 ETA: 10.507523334208463 hrs\n",
      "Processing patient# 1213 ETA: 10.314581797112767 hrs\n",
      "Processing patient# 1214 ETA: 10.08706813660406 hrs\n",
      "Processing patient# 1216 ETA: 9.996492877094832 hrs\n",
      "Processing patient# 1217 ETA: 9.102958305653404 hrs\n",
      "Processing patient# 1219 ETA: 8.925979718978024 hrs\n",
      "Processing patient# 1220 ETA: 8.507819953064123 hrs\n",
      "Processing patient# 1221 ETA: 8.645915125431838 hrs\n",
      "Processing patient# 1222 ETA: 8.543059910430449 hrs\n",
      "Processing patient# 1223 ETA: 8.726262301500293 hrs\n",
      "Processing patient# 1225 ETA: 8.727355263285615 hrs\n",
      "Processing patient# 1226 ETA: 8.455341982405919 hrs\n",
      "Processing patient# 1227 ETA: 8.351120784155135 hrs\n",
      "Processing patient# 1228 ETA: 8.316320655887562 hrs\n",
      "Processing patient# 1229 ETA: 8.488186760167965 hrs\n",
      "Processing patient# 1230 ETA: 8.393290068418892 hrs\n",
      "Processing patient# 1231 ETA: 8.380940078535387 hrs\n",
      "Processing patient# 1232 ETA: 8.325024080611765 hrs\n",
      "Processing patient# 1233 ETA: 8.342425880022724 hrs\n",
      "Processing patient# 1234 ETA: 8.32099210145232 hrs\n",
      "Processing patient# 1235 ETA: 8.283116624014719 hrs\n",
      "Processing patient# 1236 ETA: 8.308585265549246 hrs\n",
      "Processing patient# 1237 ETA: 8.243522517545086 hrs\n",
      "Processing patient# 1238 ETA: 8.686181035109781 hrs\n",
      "Processing patient# 1239 ETA: 8.866050753437216 hrs\n",
      "Processing patient# 1240 ETA: 8.860589005996784 hrs\n",
      "Processing patient# 1241 ETA: 8.904041662632933 hrs\n",
      "Processing patient# 1242 ETA: 8.81039298651395 hrs\n",
      "Processing patient# 1244 ETA: 8.987819228849979 hrs\n",
      "Processing patient# 1245 ETA: 8.728126824490818 hrs\n",
      "Processing patient# 1247 ETA: 8.675649444095177 hrs\n",
      "Processing patient# 1248 ETA: 8.429746265785287 hrs\n",
      "Processing patient# 1249 ETA: 8.358244928213475 hrs\n",
      "Processing patient# 1251 ETA: 8.380413904567561 hrs\n",
      "Processing patient# 1252 ETA: 8.127052396800783 hrs\n",
      "Processing patient# 1253 ETA: 8.06759037937758 hrs\n",
      "Processing patient# 1255 ETA: 8.08903314394838 hrs\n",
      "Processing patient# 1256 ETA: 7.984299782819692 hrs\n",
      "Processing patient# 1257 ETA: 7.922037844325599 hrs\n",
      "Processing patient# 1258 ETA: 7.861369710803259 hrs\n",
      "Processing patient# 1259 ETA: 7.8176956071153185 hrs\n",
      "Processing patient# 1260 ETA: 7.7597010760285245 hrs\n",
      "Processing patient# 1261 ETA: 7.840702676859926 hrs\n",
      "Processing patient# 1262 ETA: 7.801467700129554 hrs\n",
      "Processing patient# 1263 ETA: 7.790705240697878 hrs\n",
      "Processing patient# 1264 ETA: 7.734633472519409 hrs\n",
      "Processing patient# 1265 ETA: 7.703382088404434 hrs\n",
      "Processing patient# 1266 ETA: 7.695265196908001 hrs\n",
      "Processing patient# 1267 ETA: 7.699010734739984 hrs\n",
      "Processing patient# 1268 ETA: 7.6863632827208335 hrs\n",
      "Processing patient# 1270 ETA: 7.630932073109392 hrs\n",
      "Processing patient# 1272 ETA: 7.533154250803129 hrs\n",
      "Processing patient# 1273 ETA: 7.3653022840142786 hrs\n",
      "Processing patient# 1274 ETA: 7.319828108404134 hrs\n",
      "Processing patient# 1276 ETA: 7.362176086990922 hrs\n",
      "Processing patient# 1277 ETA: 7.265992607554316 hrs\n",
      "Processing patient# 1278 ETA: 7.218131187677043 hrs\n",
      "Processing patient# 1279 ETA: 7.247058704429203 hrs\n",
      "Processing patient# 1280 ETA: 7.2205354031175375 hrs\n",
      "Processing patient# 1284 ETA: 7.229128276611714 hrs\n",
      "Processing patient# 1285 ETA: 6.863715362470914 hrs\n",
      "Processing patient# 1286 ETA: 6.868215039321618 hrs\n",
      "Processing patient# 1287 ETA: 6.872994949424861 hrs\n",
      "Processing patient# 1288 ETA: 6.876982837402911 hrs\n",
      "Processing patient# 1289 ETA: 6.8752770751245915 hrs\n",
      "Processing patient# 1290 ETA: 6.840804919596807 hrs\n",
      "Processing patient# 1291 ETA: 6.811198504381564 hrs\n",
      "Processing patient# 1292 ETA: 6.77358079140385 hrs\n",
      "Processing patient# 1293 ETA: 6.830506550602018 hrs\n",
      "Processing patient# 1294 ETA: 6.802673126880441 hrs\n",
      "Processing patient# 1297 ETA: 6.911592493140907 hrs\n",
      "Processing patient# 1298 ETA: 6.715936135601024 hrs\n",
      "Processing patient# 1299 ETA: 6.67262925623777 hrs\n",
      "Processing patient# 1300 ETA: 6.629177303820848 hrs\n",
      "Processing patient# 1301 ETA: 6.606589229476728 hrs\n",
      "Processing patient# 1302 ETA: 6.584290691885554 hrs\n",
      "Processing patient# 1304 ETA: 6.567115247555663 hrs\n",
      "Processing patient# 1306 ETA: 6.466728328926854 hrs\n",
      "Processing patient# 1307 ETA: 6.3809614388296545 hrs\n",
      "Processing patient# 1308 ETA: 6.346115140000612 hrs\n",
      "Processing patient# 1309 ETA: 6.356010770753983 hrs\n",
      "Processing patient# 1310 ETA: 6.314931866174394 hrs\n",
      "Processing patient# 1311 ETA: 6.307182563506328 hrs\n",
      "Processing patient# 1313 ETA: 6.279791420257635 hrs\n",
      "Processing patient# 1314 ETA: 6.292742651619061 hrs\n",
      "Processing patient# 1315 ETA: 6.269811408600369 hrs\n",
      "Processing patient# 1316 ETA: 6.250402978571325 hrs\n",
      "Processing patient# 1318 ETA: 6.2339090946913895 hrs\n",
      "Processing patient# 1319 ETA: 6.160530481745883 hrs\n",
      "Processing patient# 1320 ETA: 6.130471623537166 hrs\n",
      "Processing patient# 1321 ETA: 6.121893712554752 hrs\n",
      "Processing patient# 1323 ETA: 6.089000249715125 hrs\n",
      "Processing patient# 1324 ETA: 5.980023680922165 hrs\n",
      "Processing patient# 1325 ETA: 5.956943183755874 hrs\n",
      "Processing patient# 1327 ETA: 5.946134261675508 hrs\n",
      "Processing patient# 1328 ETA: 5.850976662812754 hrs\n",
      "Processing patient# 1329 ETA: 5.820975284885477 hrs\n",
      "Processing patient# 1331 ETA: 5.8138491802821815 hrs\n",
      "Processing patient# 1333 ETA: 5.725956927912203 hrs\n",
      "Processing patient# 1334 ETA: 5.628774382734477 hrs\n",
      "Processing patient# 1335 ETA: 5.606204076225375 hrs\n",
      "Processing patient# 1336 ETA: 5.579955613982735 hrs\n",
      "Processing patient# 1337 ETA: 5.590139669641091 hrs\n",
      "Processing patient# 1338 ETA: 5.566476827514825 hrs\n",
      "Processing patient# 1339 ETA: 5.543931497014302 hrs\n",
      "Processing patient# 1341 ETA: 5.524890386738948 hrs\n",
      "Processing patient# 1342 ETA: 5.448634859161664 hrs\n",
      "Processing patient# 1343 ETA: 5.417805575740921 hrs\n",
      "Processing patient# 1344 ETA: 5.400687092230369 hrs\n",
      "Processing patient# 1345 ETA: 5.3883224553303695 hrs\n",
      "Processing patient# 1346 ETA: 5.361380771469307 hrs\n",
      "Processing patient# 1348 ETA: 5.347543323521171 hrs\n",
      "Processing patient# 1349 ETA: 5.270577565466265 hrs\n",
      "Processing patient# 1350 ETA: 5.250531795351592 hrs\n",
      "Processing patient# 1351 ETA: 5.227737224219562 hrs\n",
      "Processing patient# 1352 ETA: 5.204326420002862 hrs\n",
      "Processing patient# 1353 ETA: 5.186513844776084 hrs\n",
      "Processing patient# 1354 ETA: 5.1599767990571594 hrs\n",
      "Processing patient# 1355 ETA: 5.1349731775509415 hrs\n",
      "Processing patient# 1356 ETA: 5.108270640927247 hrs\n",
      "Processing patient# 1357 ETA: 5.115111228089255 hrs\n",
      "Processing patient# 1358 ETA: 5.096474325358868 hrs\n",
      "Processing patient# 1359 ETA: 5.068261158612955 hrs\n",
      "Processing patient# 1360 ETA: 5.042548690814938 hrs\n",
      "Processing patient# 1361 ETA: 5.01235181745535 hrs\n",
      "Processing patient# 1362 ETA: 4.998160559473705 hrs\n",
      "Processing patient# 1363 ETA: 4.974829404645009 hrs\n",
      "Processing patient# 1364 ETA: 4.952033427223199 hrs\n",
      "Processing patient# 1365 ETA: 4.928263342958509 hrs\n",
      "Processing patient# 1366 ETA: 4.906512433510309 hrs\n",
      "Processing patient# 1368 ETA: 4.884942292143961 hrs\n",
      "Processing patient# 1369 ETA: 4.82783218746433 hrs\n",
      "Processing patient# 1370 ETA: 4.8466363209135395 hrs\n",
      "Processing patient# 1371 ETA: 4.8318471228541915 hrs\n",
      "Processing patient# 1372 ETA: 4.821795012823449 hrs\n",
      "Processing patient# 1373 ETA: 4.803408985386005 hrs\n",
      "Processing patient# 1374 ETA: 4.777427026608132 hrs\n",
      "Processing patient# 1375 ETA: 4.763656514637054 hrs\n",
      "Processing patient# 1376 ETA: 4.743936037966472 hrs\n",
      "Processing patient# 1377 ETA: 4.719618169200248 hrs\n",
      "Processing patient# 1378 ETA: 4.690835314464851 hrs\n",
      "Processing patient# 1379 ETA: 4.666089715291667 hrs\n",
      "Processing patient# 1380 ETA: 4.675125468286835 hrs\n",
      "Processing patient# 1381 ETA: 4.658687751179754 hrs\n",
      "Processing patient# 1382 ETA: 4.635416509547295 hrs\n",
      "Processing patient# 1383 ETA: 4.618964403185181 hrs\n",
      "Processing patient# 1384 ETA: 4.5948020707891475 hrs\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing patient# 1385 ETA: 4.570799427429835 hrs\n",
      "Processing patient# 1386 ETA: 4.54748295711004 hrs\n",
      "Processing patient# 1387 ETA: 4.527965035849359 hrs\n",
      "Processing patient# 1388 ETA: 4.499852827242081 hrs\n",
      "Processing patient# 1389 ETA: 4.477454396751192 hrs\n",
      "Processing patient# 1390 ETA: 4.456050586798038 hrs\n",
      "Processing patient# 1391 ETA: 4.434381366932787 hrs\n",
      "Processing patient# 1392 ETA: 4.407015767265111 hrs\n",
      "Processing patient# 1394 ETA: 4.37906760837885 hrs\n",
      "Processing patient# 1395 ETA: 4.30689841369618 hrs\n",
      "Processing patient# 1396 ETA: 4.285369354544922 hrs\n",
      "Processing patient# 1397 ETA: 4.268326153216628 hrs\n",
      "Processing patient# 1398 ETA: 4.263744453107467 hrs\n",
      "Processing patient# 1399 ETA: 4.243092987329717 hrs\n",
      "Processing patient# 1400 ETA: 4.222136327758431 hrs\n",
      "Processing patient# 1401 ETA: 4.198838181790889 hrs\n",
      "Processing patient# 1402 ETA: 4.1793994686896 hrs\n",
      "Processing patient# 1403 ETA: 4.165873683685152 hrs\n",
      "Processing patient# 1404 ETA: 4.145204392498549 hrs\n",
      "Processing patient# 1405 ETA: 4.127004841931144 hrs\n",
      "Processing patient# 1406 ETA: 4.1010385624992045 hrs\n",
      "Processing patient# 1407 ETA: 4.089603952259641 hrs\n",
      "Processing patient# 1410 ETA: 4.0657218732484255 hrs\n",
      "Processing patient# 1411 ETA: 3.9614282328307855 hrs\n",
      "Processing patient# 1412 ETA: 3.935801912909784 hrs\n",
      "Processing patient# 1414 ETA: 3.9153825925978505 hrs\n",
      "Processing patient# 1415 ETA: 3.8567507741617604 hrs\n",
      "Processing patient# 1417 ETA: 3.831965497984502 hrs\n",
      "Processing patient# 1418 ETA: 3.776563055347595 hrs\n",
      "Processing patient# 1419 ETA: 3.7698241562507033 hrs\n",
      "Processing patient# 1420 ETA: 3.7446089217382843 hrs\n",
      "Processing patient# 1421 ETA: 3.7240907096557123 hrs\n",
      "Processing patient# 1422 ETA: 3.7038961130109156 hrs\n",
      "Processing patient# 1423 ETA: 3.682920000244269 hrs\n",
      "Processing patient# 1424 ETA: 3.6590023383365153 hrs\n",
      "Processing patient# 1425 ETA: 3.6355938848919345 hrs\n",
      "Processing patient# 1426 ETA: 3.6216272131716254 hrs\n",
      "Processing patient# 1427 ETA: 3.6168983707203903 hrs\n",
      "Processing patient# 1428 ETA: 3.5952787640760513 hrs\n",
      "Processing patient# 1429 ETA: 3.5846346884957216 hrs\n",
      "Processing patient# 1430 ETA: 3.564002736003503 hrs\n",
      "Processing patient# 1431 ETA: 3.5397180199405214 hrs\n",
      "Processing patient# 1432 ETA: 3.5171189450058673 hrs\n",
      "Processing patient# 1433 ETA: 3.495935878656453 hrs\n",
      "Processing patient# 1436 ETA: 3.490441271786393 hrs\n",
      "Processing patient# 1437 ETA: 3.3961545779969957 hrs\n",
      "Processing patient# 1438 ETA: 3.3754478486011674 hrs\n",
      "Processing patient# 1439 ETA: 3.3569018378051583 hrs\n",
      "Processing patient# 1440 ETA: 3.3322680721583744 hrs\n",
      "Processing patient# 1441 ETA: 3.3100857617839687 hrs\n",
      "Processing patient# 1443 ETA: 3.291559807825679 hrs\n",
      "Processing patient# 1444 ETA: 3.2368162911495224 hrs\n",
      "Processing patient# 1445 ETA: 3.219285455204192 hrs\n",
      "Processing patient# 1446 ETA: 3.1963537533862594 hrs\n",
      "Processing patient# 1447 ETA: 3.1745961232535005 hrs\n",
      "Processing patient# 1448 ETA: 3.161650590983931 hrs\n",
      "Processing patient# 1449 ETA: 3.1433429676488482 hrs\n",
      "Processing patient# 1450 ETA: 3.1198210242682034 hrs\n",
      "Processing patient# 1451 ETA: 3.0976603277366 hrs\n",
      "Processing patient# 1452 ETA: 3.0779698065120384 hrs\n",
      "Processing patient# 1453 ETA: 3.0560144474137805 hrs\n",
      "Processing patient# 1454 ETA: 3.035682783060343 hrs\n",
      "Processing patient# 1455 ETA: 3.01512198534926 hrs\n",
      "Processing patient# 1457 ETA: 3.001986470494109 hrs\n",
      "Processing patient# 1460 ETA: 2.952160390772817 hrs\n",
      "Processing patient# 1461 ETA: 2.8634344177440965 hrs\n",
      "Processing patient# 1462 ETA: 2.8440811097260443 hrs\n",
      "Processing patient# 1464 ETA: 2.8232880296876375 hrs\n",
      "Processing patient# 1465 ETA: 2.7719246257226184 hrs\n",
      "Processing patient# 1466 ETA: 2.7521925461187706 hrs\n",
      "Processing patient# 1468 ETA: 2.7326375998926817 hrs\n",
      "Processing patient# 1469 ETA: 2.6889556477504155 hrs\n",
      "Processing patient# 1471 ETA: 2.6669628266321768 hrs\n",
      "Processing patient# 1472 ETA: 2.61743253900286 hrs\n",
      "Processing patient# 1473 ETA: 2.59755473644354 hrs\n",
      "Processing patient# 1474 ETA: 2.580403991718651 hrs\n",
      "Processing patient# 1475 ETA: 2.5592905984358345 hrs\n",
      "Processing patient# 1476 ETA: 2.537063879300142 hrs\n",
      "Processing patient# 1477 ETA: 2.5267651822019 hrs\n",
      "Processing patient# 1478 ETA: 2.5045537118088426 hrs\n",
      "Processing patient# 1479 ETA: 2.481165919057026 hrs\n",
      "Processing patient# 1480 ETA: 2.4596286582390934 hrs\n",
      "Processing patient# 1481 ETA: 2.4357531235283094 hrs\n",
      "Processing patient# 1482 ETA: 2.414465554013931 hrs\n",
      "Processing patient# 1483 ETA: 2.397971823392403 hrs\n",
      "Processing patient# 1484 ETA: 2.3773148383053253 hrs\n",
      "Processing patient# 1485 ETA: 2.359427685907246 hrs\n",
      "Processing patient# 1486 ETA: 2.3374206903567583 hrs\n",
      "Processing patient# 1487 ETA: 2.316515679359436 hrs\n",
      "Processing patient# 1488 ETA: 2.294826008774524 hrs\n",
      "Processing patient# 1489 ETA: 2.2790878480482633 hrs\n",
      "Processing patient# 1490 ETA: 2.2603696225018344 hrs\n",
      "Processing patient# 1491 ETA: 2.239299383374652 hrs\n",
      "Processing patient# 1492 ETA: 2.217807365984541 hrs\n",
      "Processing patient# 1494 ETA: 2.1953265507487028 hrs\n",
      "Processing patient# 1495 ETA: 2.154147859638919 hrs\n",
      "Processing patient# 1496 ETA: 2.1373596109892876 hrs\n",
      "Processing patient# 1498 ETA: 2.1154642674823054 hrs\n",
      "Processing patient# 1499 ETA: 2.0658858120215506 hrs\n",
      "Processing patient# 1500 ETA: 2.0452432717780265 hrs\n",
      "Processing patient# 1501 ETA: 2.025089469167218 hrs\n",
      "Processing patient# 1502 ETA: 2.006434742403083 hrs\n",
      "Processing patient# 1503 ETA: 1.9848314131735696 hrs\n",
      "Processing patient# 1504 ETA: 1.9642185280659386 hrs\n",
      "Processing patient# 1505 ETA: 1.943846245320117 hrs\n",
      "Processing patient# 1506 ETA: 1.922791921977514 hrs\n",
      "Processing patient# 1507 ETA: 1.900297880433324 hrs\n",
      "Processing patient# 1508 ETA: 1.8807857255689255 hrs\n",
      "Processing patient# 1509 ETA: 1.8639425218906893 hrs\n",
      "Processing patient# 1510 ETA: 1.8451056155743986 hrs\n",
      "Processing patient# 1511 ETA: 1.8219857168580944 hrs\n",
      "Processing patient# 1512 ETA: 1.7994089303411904 hrs\n",
      "Processing patient# 1513 ETA: 1.7763814680467143 hrs\n",
      "Processing patient# 1514 ETA: 1.7547966026234776 hrs\n",
      "Processing patient# 1515 ETA: 1.7381065940352345 hrs\n",
      "Processing patient# 1516 ETA: 1.7198453679184118 hrs\n",
      "Processing patient# 1517 ETA: 1.6968657202587767 hrs\n",
      "Processing patient# 1518 ETA: 1.6766352182999515 hrs\n",
      "Processing patient# 1519 ETA: 1.6640672508136443 hrs\n",
      "Processing patient# 1520 ETA: 1.6443906538188457 hrs\n",
      "Processing patient# 1521 ETA: 1.6265579485526016 hrs\n",
      "Processing patient# 1522 ETA: 1.6064969870503643 hrs\n",
      "Processing patient# 1523 ETA: 1.5878849256592276 hrs\n",
      "Processing patient# 1524 ETA: 1.5670396502469308 hrs\n",
      "Processing patient# 1526 ETA: 1.5448135244235013 hrs\n",
      "Processing patient# 1527 ETA: 1.5007179911218453 hrs\n",
      "Processing patient# 1528 ETA: 1.4779989854163593 hrs\n",
      "Processing patient# 1529 ETA: 1.455040394503777 hrs\n",
      "Processing patient# 1530 ETA: 1.4348017759756608 hrs\n",
      "Processing patient# 1531 ETA: 1.412959918495631 hrs\n",
      "Processing patient# 1532 ETA: 1.3895584875255462 hrs\n",
      "Processing patient# 1534 ETA: 1.3681563406241992 hrs\n",
      "Processing patient# 1535 ETA: 1.32147174191119 hrs\n",
      "Processing patient# 1536 ETA: 1.2993491290633876 hrs\n",
      "Processing patient# 1537 ETA: 1.2765257389907825 hrs\n",
      "Processing patient# 1538 ETA: 1.2545430914596223 hrs\n",
      "Processing patient# 1539 ETA: 1.2330312886358439 hrs\n",
      "Processing patient# 1540 ETA: 1.2134439415684322 hrs\n",
      "Processing patient# 1541 ETA: 1.1903008280926082 hrs\n",
      "Processing patient# 1542 ETA: 1.167653547794189 hrs\n",
      "Processing patient# 1543 ETA: 1.1457156837554199 hrs\n",
      "Processing patient# 1544 ETA: 1.1241135936051372 hrs\n",
      "Processing patient# 1545 ETA: 1.102846443816275 hrs\n",
      "Processing patient# 1546 ETA: 1.080637407885313 hrs\n",
      "Processing patient# 1547 ETA: 1.0583609296631968 hrs\n",
      "Processing patient# 1549 ETA: 1.0358784646273498 hrs\n",
      "Processing patient# 1550 ETA: 0.9894958379524097 hrs\n",
      "Processing patient# 1551 ETA: 0.9702932011284904 hrs\n",
      "Processing patient# 1552 ETA: 0.9487030035562142 hrs\n",
      "Processing patient# 1553 ETA: 0.927032014874493 hrs\n",
      "Processing patient# 1555 ETA: 0.9052574532595503 hrs\n",
      "Processing patient# 1556 ETA: 0.8583338576951499 hrs\n",
      "Processing patient# 1557 ETA: 0.8364724836388824 hrs\n",
      "Processing patient# 1558 ETA: 0.8143605039141544 hrs\n",
      "Processing patient# 1559 ETA: 0.7939183002461303 hrs\n",
      "Processing patient# 1560 ETA: 0.7714008516864275 hrs\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing patient# 1561 ETA: 0.7496390868227617 hrs\n",
      "Processing patient# 1562 ETA: 0.7277910622262829 hrs\n",
      "Processing patient# 1563 ETA: 0.70534063768343 hrs\n",
      "Processing patient# 1564 ETA: 0.6836017272867927 hrs\n",
      "Processing patient# 1566 ETA: 0.6618160449642033 hrs\n",
      "Processing patient# 1568 ETA: 0.6160384095808674 hrs\n",
      "Processing patient# 1570 ETA: 0.5702290712717382 hrs\n",
      "Processing patient# 1571 ETA: 0.5249562931103638 hrs\n",
      "Processing patient# 1572 ETA: 0.5033148116076286 hrs\n",
      "Processing patient# 1573 ETA: 0.48112341017214527 hrs\n",
      "Processing patient# 1574 ETA: 0.4594317513380257 hrs\n",
      "Processing patient# 1575 ETA: 0.43778636997364123 hrs\n",
      "Processing patient# 1576 ETA: 0.415779529424036 hrs\n",
      "Processing patient# 1577 ETA: 0.3949588738670405 hrs\n",
      "Processing patient# 1578 ETA: 0.37282882327230255 hrs\n",
      "Processing patient# 1580 ETA: 0.3515907668564129 hrs\n",
      "Processing patient# 1581 ETA: 0.30684781963013846 hrs\n",
      "Processing patient# 1582 ETA: 0.28561606805205886 hrs\n",
      "Processing patient# 1583 ETA: 0.26367890904528596 hrs\n",
      "Processing patient# 1584 ETA: 0.241728710744323 hrs\n",
      "Processing patient# 1585 ETA: 0.2196190606033997 hrs\n",
      "Processing patient# 1587 ETA: 0.19766932272849366 hrs\n",
      "Processing patient# 1588 ETA: 0.1533620524758064 hrs\n",
      "Processing patient# 1589 ETA: 0.1314311540085944 hrs\n",
      "Processing patient# 1590 ETA: 0.10954693088589212 hrs\n",
      "Processing patient# 1591 ETA: 0.08776483735769842 hrs\n",
      "Processing patient# 1592 ETA: 0.06588565785057654 hrs\n",
      "Processing patient# 1594 ETA: 0.04392167075191941 hrs\n",
      "31161.49111223221\n"
     ]
    }
   ],
   "source": [
    "start_time=time.time()\n",
    "\n",
    "elapsed_time=0\n",
    "totaltime=94000\n",
    "thresh=-500 #lower HU threshold for nodule segmentation\n",
    "noduleimages=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
    "nodulemasks=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
    "sample=[]\n",
    "area=[]\n",
    "noduleindicies=[]\n",
    "index=0\n",
    "start=1\n",
    "end=400\n",
    "for i in range(len(patients)):\n",
    "    print(\"Processing patient#\",i,\"ETA:\",(totaltime-elapsed_time)/3600,\"hrs\")\n",
    "    if (i-1)/400-np.floor((i-1)/400)==0:\n",
    "        noduleimages=noduleimages[:index]\n",
    "        nodulemasks=nodulemasks[:index]\n",
    "        table=generatefeaturetable(nodulemasks)\n",
    "        print(\"Saving data for patients\"+str(start)+\"-\"+str(end))\n",
    "        np.save(datafolder+\"DSBNoduleImages\"+str(start)+\"-\"+str(end)+\".npy\",noduleimages)\n",
    "        np.save(datafolder+\"DSBNoduleMasks\"+str(start)+\"-\"+str(end)+\".npy\",nodulemasks)\n",
    "        table.to_csv(datafolder+\"DSBNoduleFeatures\"+str(start)+\"-\"+str(end)+\".csv\")\n",
    "        del noduleimages, nodulemasks\n",
    "        noduleimages=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
    "        nodulemasks=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
    "        sample=[]\n",
    "        area=[]\n",
    "        noduleindicies=[]\n",
    "        index=0  \n",
    "    patient_scan=load_scan(INPUT_FOLDER+patients[i])\n",
    "    patient_pix=get_pixels_hu(patient_scan)\n",
    "    processed_pix = processimagefromfile(patient_pix)\n",
    "    mask = predictmask(processed_pix)\n",
    "    noduleindex = getnoduleindex(mask)\n",
    "    trueinds=trueindicies(processed_pix,noduleindex)\n",
    "\n",
    "    for ind in trueinds:\n",
    "        noduleimages[index,0]=patient_pix[ind]\n",
    "        nodulemasks[index,0]=mask[ind]\n",
    "        sample.append(patients[i])\n",
    "        area.append(np.sum(mask[ind]))\n",
    "        noduleindicies.append(ind)\n",
    "        index+=1\n",
    "\n",
    "    elapsed_time=time.time()-start_time\n",
    "    totaltime=elapsed_time/(i-start+1)*(end-start)\n",
    "\n",
    "\n",
    "\n",
    "print(elapsed_time)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
